*****************************************************************************************************
* Purpose: Clean and analyze ELSA data for diabetes-mortality paper
* Written by: Hunter Green and David Flood
* Last updated: 2024-12-28
* Stata version: 18.0
*****************************************************************************************************

*****************************************************************************************************
* Options, global macros, open log
*****************************************************************************************************
* Options
version 18.0
clear all
set more off
set varabbrev off
pause on

* Use the macro $S_DATE to save files with current date
global date : disp %tdCCYY.NN.DD date("$S_DATE","DMY")

* Global folder macros
* ELSA public data (edition 39, downloaded 2023-11-23)
global elsa_public "CHANGE ME\UKDA-5050-stata\stata\stata13_se"
* ELSA linked mortality data
global elsa_mortality "CHANGE ME"
*output
global output "C:\Users\Paola\OneDrive - University College London\ELSA\Papers in progress\HRS family diabetes"

* Open log
capture log close
log using "${output}\ELSA_diabetes_mortality_log_${date}.log", replace


*****************************************************************************************************
* Format mortality data
*****************************************************************************************************
* ELSA mortality data
use "${elsa_mortality}\Mortality_2018.dta", clear

* If ELSA death dates are stuctured as multiple numeric variables for day, month, and year
gen deathmo = MDeath
gen deathyr = YDeath

* Just making sure variables were loaded
summ deathmo
summ deathyr
replace deathmo =. if deathmo < 0

* Save data
save elsa_mortality.dta, replace


*****************************************************************************************************
* Clean mortality data
*****************************************************************************************************
* Harmonized ELSA version G.3
use idauniq inw6 inw7 inw8 inw9 r6iwindm r7iwindm r8iwindm r9iwindm r6iwindy r7iwindy r8iwindy ///
    r9iwindy using "${elsa_public}\h_elsa_g3.dta", clear

* ELSA mortality data
merge 1:1 idauniq using elsa_mortality.dta, keepusing(deathmo deathyr)
drop _merge

* Keep if in ELSA Wave 6 interview
keep if inw6 == 1

* Wave 6 (May 2012 - May 2013)
* Year and month of Wave 6 interview
gen w6_ym = ym(r6iwindy, r6iwindm)
format w6_ym %tm
drop r6iwindm r6iwindy

* Wave 7 (June 2014 - May 2015)
* Year and month of Wave 7 interview
gen w7_ym = ym(r7iwindy, r7iwindm)
format w7_ym %tm
drop r7iwindm r7iwindy

* Wave 8 (May 2016 - June 2017)
* Year and month of Wave 8 interview
gen w8_ym = ym(r8iwindy, r8iwindm)
format w8_ym %tm
drop r8iwindm r8iwindy

* Wave 9 (July 2018 - July 2019)
* Year and month of Wave 8 interview
gen w9_ym = ym(r9iwindy, r9iwindm)
format w9_ym %tm
drop r9iwindm r9iwindy

* Year and month of biomarker collection (same as Wave 6 interview)
gen biomarker_ym = w6_ym
format biomarker_ym %tm

* Year and month of death
**There is one record of death from 2002, let's delete it
tab deathyr
drop if deathyr == 2002

gen death_ym = ym(deathyr, deathmo)
format death_ym %tm
summ death_ym

gen deceased = 1 if !mi(death_ym)

* Censor
* Indicator for end of study (I think this is last month/year of death data due to published papers I've seen)
***The last mortality record is 2018,04 so I have changed the end of follow up to May 2018, I hope this is ok
gen end_study_ym = ym(2018, 05)
format end_study_ym %tm

* Year and month of censor
gen censor_ym =.
replace censor_ym = end_study_ym if !mi(w9_ym) & mi(death_ym) & mi(censor_ym)
replace censor_ym = w8_ym if !mi(w8_ym) & mi(w9_ym) & mi(death_ym) & mi(censor_ym)
replace censor_ym = w7_ym if !mi(w7_ym) & mi(w8_ym) & mi(w9_ym) & mi(death_ym) & mi(censor_ym)
format censor_ym %tm
drop deathyr deathmo end_study_ym

* Order variables
order idauniq inw6 inw7 inw8 inw9 w6_ym w7_ym w8_ym w9_ym biomarker_ym deceased death_ym censor_ym

* Label variables	
label variable inw6 "ELSA in Wave 6 sample"
label variable inw7 "ELSA in Wave 7 sample"
label variable inw8 "ELSA in Wave 8 sample"
label variable inw9 "ELSA in Wave 9 sample"
label variable w6_ym "ELSA Wave 6 interview year & month"
label variable w7_ym "ELSA Wave 7 interview year & month"
label variable w8_ym "ELSA Wave 8 interview year & month"
label variable w9_ym "ELSA Wave 9 interview year & month"
label variable biomarker_ym "ELSA year & month of biomarker collection"
label variable deceased "ELSA R is deceased"
label variable death_ym "ELSA year & month of mortality"
label variable censor_ym "ELSA year & month of censor"

* Save data
save elsa_dates.dta, replace


*****************************************************************************************************
* Clean data
*****************************************************************************************************
* Harmonized ELSA version G.3
use idauniq r6strat r6clust r6agey ragender raeducl r6diabe r6rxdiab r6smoken r6mbmi h6itot using "${elsa_public}\h_elsa_g3.dta", clear

* ELSA Wave 6 biomarker data
merge 1:1 idauniq using "${elsa_public}\wave_6_elsa_nurse_data_v2.dta", keepusing(hba1c w6bldwt)
drop _merge

* ELSA Dates
merge 1:1 idauniq using elsa_dates.dta, keepusing(inw6 biomarker_ym deceased death_ym censor_ym)
drop _merge

* Keep if in ELSA Wave 6 interview
keep if inw6 == 1

* Assign value Labels
label define age3 1 "1.51-59" 2 "2.60-69" 3 "3.70+"
label define age11 1 "1.51-54" 2 "2.55-59" 3 "3.60-64" 4 "4.65-69" 5 "5.70-74" ///
                   6 "6.75-79" 7 "7.80-84" 8 "8.85-89" 9 "9.90-94" 10 "10.95-99" 11 "11.100+"
label define sex 1 "1.male" 2 "2.female"
label define yesno 0 "0.no" 1 "1.yes"
label define diab_3cat 0 "0.no diabetes" 1 "1.undiagnosed diabetes" 2 "2.diagnosed diabetes"
label define cendied 0 "0.censored" 1 "1.died"
label define bmi4 1 "1.<18.5 (underweight)" 2 "2.18.5-24.9 (normal weight)" 3 "3.25.0-29.9 (overweight)" ///
                  4 "4.30+ (obese)"
label define tertile 1 "1.low" 2 "2.middle" 3 "3.high"
label define educ3 1 "1.lt upper secondary" 2 "2.upper secondary and vocational" 3 "3.tertiary"
label define educ2 1 "1.lt upper secondary" 2 "2.upper secondary and greater"

* ID
rename idauniq id
label variable id "id: ELSA ID (idauniq)"

* Weight
rename w6bldwt e_weight
label variable e_weight "e_weight: ELSA weight"

* Design variables - variables are flipped in Harmonized ELSA
rename r6strat e_cluster
label variable e_cluster "e_cluster: ELSA cluster (r6strat)"

rename r6clust e_strata
label variable e_strata "e_strata: ELSA strata (r6clust)"

* Age at biomarker collection
gen agey =.
replace agey = r6agey if !mi(r6agey)
label variable agey "agey: age at biomarker collection (years)"
drop r6agey

* Age category
gen agecat =.
replace agecat = 1 if inrange(agey,51,59)
replace agecat = 2 if inrange(agey,60,69)
replace agecat = 3 if inrange(agey,70,200)
label variable agecat "agecat: age category"
label values agecat age3

* create variable for age standardization - age groups
gen agevar =.
replace agevar = 1 if inrange(agey,51,54)
replace agevar = 2 if inrange(agey,55,59)
replace agevar = 3 if inrange(agey,60,64)
replace agevar = 4 if inrange(agey,65,69)
replace agevar = 5 if inrange(agey,70,74)
replace agevar = 6 if inrange(agey,75,79)
replace agevar = 7 if inrange(agey,80,84)
replace agevar = 8 if inrange(agey,85,89)
replace agevar = 9 if inrange(agey,90,94)
replace agevar = 10 if inrange(agey,95,99)
replace agevar = 11 if inrange(agey,100,200)
label variable agevar "agevar: age category for standardization"
label values agevar age11

* create variable for age standardization - population standard weights
* Ahmad OB et al. Age Standardization of Rates: A New WHO Standard. World Health Organization. 2001.
gen ageweightvar =.
replace ageweightvar = 0.2454857 if agevar == 1
replace ageweightvar = 0.2080000 if agevar == 2
replace ageweightvar = 0.1700571 if agevar == 3
replace ageweightvar = 0.1353143 if agevar == 4
replace ageweightvar = 0.1010286 if agevar == 5
replace ageweightvar = 0.0694857 if agevar == 6
replace ageweightvar = 0.0416000 if agevar == 7
replace ageweightvar = 0.0201143 if agevar == 8
replace ageweightvar = 0.0068571 if agevar == 9
replace ageweightvar = 0.0018286 if agevar == 10
replace ageweightvar = 0.0002286 if agevar == 11
label variable ageweightvar "ageweightvar: population standard weights for standardization"

* Gender
gen gender =.
replace gender = ragender if !mi(ragender)
label variable gender "gender: gender"
label values gender sex
drop ragender

* Education
gen educ =.
replace educ = raeducl if !mi(raeducl)
label variable educ "educ: education"
label values educ educ3
drop raeducl

* Education
gen educ2 =.
replace educ2 = 1 if educ == 1
replace educ2 = 2 if inlist(educ,2,3)
label variable educ2 "educ2: education (2 categories)"
label values educ2 educ2

* Household economic status
gen hh_econ =.
replace hh_econ = h6itot if !mi(h6itot)
drop h6itot

* Smoking status at biomarker collection
gen smoker =.
replace smoker = r6smoken if !mi(r6smoken)
label variable smoker "smoker: smoking status at biomarker collection"
label values smoker yesno
drop r6smoken

* BMI at biomarker collection
gen mbmi =.
replace mbmi = r6mbmi if !mi(r6mbmi)
label variable mbmi "mbmi: measured BMI at biomarker collection"
drop r6mbmi

* Categorical bmi variable
gen bmicat =.
replace bmicat = 1 if inrange(mbmi,0,18.499999)
replace bmicat = 2 if inrange(mbmi,18.5,24.999999)
replace bmicat = 3 if inrange(mbmi,25,29.999999)
replace bmicat = 4 if inrange(mbmi,30,3000)
label variable bmicat "bmicat: measured BMI category"
label values bmicat bmi4

* Diabetes diagnosis at biomarker collection
gen diagdiab =.
replace diagdiab = r6diabe if !mi(r6diabe)
label variable diagdiab  "diagdiab: diagnosed diabetes"
label values diagdiab yesno
drop r6diabe

* Diabetes medication at biomarker collection
gen rxdiab =.
replace rxdiab = r6rxdiab if !mi(r6rxdiab)
label variable rxdiab  "rxdiab: diagnosed medication (oral or insulin)"
label values rxdiab yesno
drop r6rxdiab

* Diabetes medication among diagnosed
gen rxdiab_among_diag =.
replace rxdiab_among_diag = 0 if diagdiab == 1 & rxdiab == 0
replace rxdiab_among_diag = 1 if diagdiab == 1 & rxdiab == 1
label variable rxdiab_among_diag "rxdiab_among_diag: diabetes medication among diagnosed"
label values rxdiab_among_diag yesno

* HbA1c
gen e_hba1c =.
replace e_hba1c = hba1c if inrange(hba1c,15,137)
label variable e_hba1c "e_hba1c: HbA1c (%)"
drop hba1c
rename e_hba1c hba1c

* Measured hba1c >= 6.5%
gen a1c_ge_65 =.
replace a1c_ge_65 = 0 if inrange(hba1c,15,47)
replace a1c_ge_65 = 1 if inrange(hba1c,48,137)
label variable a1c_ge_65 "a1c_ge_65: measured hba1c >= 6.5%"
label values a1c_ge_65 yesno

* Total diabetes (self-report diagnosis + biomarker)
gen totdiab =.
replace totdiab = 0 if diagdiab == 0 & a1c_ge_65 == 0
replace totdiab = 1 if (diagdiab == 1 | a1c_ge_65 == 1) & !mi(diagdiab) & !mi(a1c_ge_65)
label variable totdiab "totdiab: total diabetes (self-report diagnosis + biomarker)"
label values totdiab yesno

** Total diabetes (self-report medication + biomarker)
gen totdiab_rx =.
replace totdiab_rx = 0 if rxdiab == 0 & a1c_ge_65 == 0
replace totdiab_rx = 1 if (rxdiab == 1 | a1c_ge_65 == 1) & !mi(rxdiab) & !mi(a1c_ge_65)
label variable totdiab_rx "totdiab_rx: total diabetes (self-report medication + biomarker)"
label values totdiab_rx yesno

** Diagnosed among all with diabetes
gen diab3 =.
replace diab3 = 0 if totdiab == 0
replace diab3 = 1 if totdiab == 1 & diagdiab == 0
replace diab3 = 2 if totdiab == 1 & diagdiab == 1
label variable diab3 "diab3: diagnosed among all diabetes"
label values diab3 diab_3cat

** Diagnosed among all with diabetes
gen diag_among_diab =.
replace diag_among_diab = 0 if totdiab == 1 & diagdiab == 0
replace diag_among_diab = 1 if totdiab == 1 & diagdiab == 1
label variable diag_among_diab "diag_among_diab: diagnosed among all diabetes"
label values diag_among_diab yesno

** Indicator for death/censor
gen censordied =.
replace censordied = 0 if !mi(censor_ym) & !mi(biomarker_ym)
replace censordied = 1 if !mi(death_ym) & !mi(biomarker_ym)
label variable censordied "censordied: indicator of death/censor"
label values censordied cendied

* Time between biomarker collection and death/censor
* 	Set zero values (possible) equal to 0.5
* 	Set negative values (impossible) equal to missing
gen followtime_m = death_ym - biomarker_ym
replace followtime_m = censor_ym - biomarker_ym if mi(followtime_m)
replace followtime_m = 0.5 if followtime_m == 0
replace followtime_m = . if inrange(followtime_m,-100,-1)
label variable followtime_m "followtime_m: months between biomarker collection and death/censor"
summ followtime_m

gen followtime_y = (followtime_m / 12)
label variable followtime_y "followtime_y: years between biomarker collection and death/censor"

* Indicator for non-missing sample
gen nonmiss = 0
replace nonmiss = 1 if !mi(agecat) & !mi(gender) & !mi(educ) & !mi(hh_econ) & !mi(smoker) & !mi(bmicat) & !mi(totdiab) & !mi(e_weight) & inrange(followtime_m,0.5,999999)
label variable nonmiss "nonmiss: nonmissing sample"
label values nonmiss yesno

* Household economic status tertile
xtile econtert = hh_econ if nonmiss == 1, nquantiles(3)
label variable econtert "econtert: ELSA household economic status tertile"
label values econtert tertile

* Compress data
compress


*****************************************************************************************************
* Flow diagram
*****************************************************************************************************
gen sample = 1 //total ELSA sample
tab sample

replace sample = 0 if mi(agecat) //aged <51
tab sample

replace sample = 0 if !inrange(followtime_m,0.5,999999) //invalid followup
tab sample

replace sample = 0 if mi(hba1c) //no hba1c
tab sample

replace sample = 0 if mi(diagdiab) //missing diabetes diagnosis
tab sample

replace sample = 0 if mi(gender) //missing gender
tab sample

replace sample = 0 if mi(educ) //missing education
tab sample

replace sample = 0 if mi(hh_econ) //missing economic status
tab sample

replace sample = 0 if mi(smoker) //missing smoking status
tab sample

replace sample = 0 if mi(bmicat) //missing bmi
tab sample

replace sample = 0 if mi(e_weight) //missing weight
tab sample

tab sample nonmiss
drop sample


****************************************************************************************************
****************************************************************************************************
* Table 1
****************************************************************************************************
****************************************************************************************************
* Create excel doc
putexcel set "${output}\ELSA_diabetes_mortality_tables_${date}.xlsx", sheet("Table 1") replace

* Table formatting
* Lines
putexcel A3:B3, border(top)
putexcel A3:B3, border(bottom)
putexcel A23:B23, border(bottom)

* Font
putexcel A1:B23, font("Arial",10)

* Column width
mata: b = xl()
mata: b.load_book("${output}\ELSA_diabetes_mortality_tables_${date}.xlsx")
mata: b.set_sheet("Table 1")
mata: b.set_column_width(1,1,45)
mata: b.set_column_width(2,2,20)

* Center justify
putexcel B3:B23, hcenter

* Right justify
putexcel A13:A15, right
putexcel A18:A21, right

* Make table frame
* Title
putexcel A1 = "Table 1: Cohort characteristics"

* Top rows
putexcel B3 = "England (ELSA)"

* First column *************************************************************************************
putexcel A4 = "Survey characteristics"
putexcel A5 = "Years of data collection"
putexcel A6 = "Sample size, n"
putexcel A7 = "Deaths, n"
putexcel A8 = "Follow-up period (years), median (IQR)"
putexcel A9 = "Respondent characteristics"
putexcel A10 = "Age (years), median (IQR)"
putexcel A11 = "Female, % (95% CI)"
putexcel A12 = "Education, % (95% CI)"
putexcel A13 = "Less than upper secondary"
putexcel A14 = "Upper secondary and vocational"
putexcel A15 = "Tertiary"
putexcel A16 = "Current smoker, % (95% CI)"
putexcel A17 = "BMI, % (95% CI)"
putexcel A18 = "<18.5 (Underweight)"
putexcel A19 = "18.5-24.9 (Normal)"
putexcel A20 = "25.0-29.9 (Overweight)"
putexcel A21 = ">=30 (Obese)"
putexcel A22 = "Diabetes (diagnosed and undiagnosed), % (95% CI)"
putexcel A23 = "Diagnosed among all with diabetes, % (95% CI)"

* Fill in results
* Second column (ELSA) *********************************************************
svyset e_cluster [pw = e_weight], strata(e_strata)

* Years of data collection
putexcel B5 = "2012 to 2018"

* Sample size
tab nonmiss if nonmiss == 1, matcell(mat_x)
putexcel B6 = mat_x[1,1], nformat("#,###")

* Deaths
tab censordied if nonmiss == 1, matcell(mat_x)
putexcel B7 = mat_x[2,1], nformat("#,###")

* Follow-up period (years)
summ followtime_y if nonmiss == 1, d
local loc_x = string(r(p50),"%9.1f") + " (" + string(r(p25),"%9.1f") + " - " + string(r(p75),"%9.1f") + ")"
putexcel B8 = "`loc_x'"

* Age (years)
summ agey if nonmiss == 1 [aw = e_weight], d
local loc_x = string(r(p50),"%9.1f") + " (" + string(r(p25),"%9.1f") + " - " + string(r(p75),"%9.1f") + ")"
putexcel B10 = "`loc_x'"

* Female
svy, subpop(if nonmiss == 1): proportion gender, percent
local loc_x = string(r(table)[1,2],"%9.1f") + " (" + string(r(table)[5,2],"%9.1f") + " - " + string(r(table)[6,2],"%9.1f") + ")"
putexcel B11 = "`loc_x'"

* Education
svy, subpop(if nonmiss == 1): proportion educ, percent
forvalues n = 1/3 {
	local row = `n' + 12
	local loc_x = string(r(table)[1,`n'],"%9.1f") + " (" + string(r(table)[5,`n'],"%9.1f") + " - " + string(r(table)[6,`n'],"%9.1f") + ")"
	putexcel B`row' = "`loc_x'"
}

* Current smoker
svy, subpop(if nonmiss == 1): proportion smoker, percent
local loc_x = string(r(table)[1,2],"%9.1f") + " (" + string(r(table)[5,2],"%9.1f") + " - " + string(r(table)[6,2],"%9.1f") + ")"
putexcel B16 = "`loc_x'"

* BMI
svy, subpop(if nonmiss == 1): proportion bmicat, percent
forvalues n = 1/4 {
	local row = `n' + 17
	local loc_x = string(r(table)[1,`n'],"%9.1f") + " (" + string(r(table)[5,`n'],"%9.1f") + " - " + string(r(table)[6,`n'],"%9.1f") + ")"
	putexcel B`row' = "`loc_x'"
}

* Total diabetes
svy, subpop(if nonmiss == 1): proportion totdiab, stdize(agevar) stdweight(ageweightvar) percent
local loc_x = string(r(table)[1,2],"%9.1f") + " (" + string(r(table)[5,2],"%9.1f") + " - " + string(r(table)[6,2],"%9.1f") + ")"
putexcel B22 = "`loc_x'"

* Diagnosed among all with diabetes
svy, subpop(if nonmiss == 1): proportion diag_among_diab, stdize(agevar) stdweight(ageweightvar) percent
local loc_x = string(r(table)[1,2],"%9.1f") + " (" + string(r(table)[5,2],"%9.1f") + " - " + string(r(table)[6,2],"%9.1f") + ")"
putexcel B23 = "`loc_x'"


****************************************************************************************************
****************************************************************************************************
* Figure 1 data
****************************************************************************************************
****************************************************************************************************
* Create excel doc
putexcel set "${output}\ELSA_diabetes_mortality_tables_${date}.xlsx", sheet("Figure 1") modify

* Make table frame
* Variables
putexcel A1 = "study"
putexcel B1 = "age"
putexcel C1 = "pct"
putexcel D1 = "pct_ll"
putexcel E1 = "pct_ul"

* Study
forvalues row = 2/4 {
	putexcel A`row' = "England (ELSA)"
}

* Age
putexcel B2 = "51-59"
putexcel B3 = "60-69"
putexcel B4 = "70+"

* ELSA
svyset e_cluster [pw = e_weight], strata(e_strata)
svy, subpop(if nonmiss == 1): proportion totdiab, over(agecat) percent
forvalues n = 4/6 {
	local row = `n' - 2
	local loc_x = round(r(table)[1,`n'],0.1)
	putexcel C`row' = `loc_x'
	local loc_ll = round(r(table)[5,`n'],0.1)
	putexcel D`row' = `loc_ll'
	local loc_ul = round(r(table)[6,`n'],0.1)
	putexcel E`row' = `loc_ul'
}


****************************************************************************************************
****************************************************************************************************
* Figure 2 data
****************************************************************************************************
****************************************************************************************************
* Create excel doc
putexcel set "${output}\ELSA_diabetes_mortality_tables_${date}.xlsx", sheet("Figure 2") modify

* Make table frame
* Variables
putexcel A1 = "study"
putexcel B1 = "diabetes"
putexcel C1 = "rate"
putexcel D1 = "rate_ll"
putexcel E1 = "rate_ul"

* Study
forvalues row = 2/3 {
	putexcel A`row' = "England (ELSA)"
}

* With/without diabetes
putexcel B2 = "Without diabetes"
putexcel B3 = "Diabetes"

* ELSA
svyset e_cluster [pw = e_weight], strata(e_strata)
svy, subpop(if nonmiss == 1): poisson censordied i.totdiab i.agecat i.gender i.educ i.econtert i.smoker b2.bmicat, base irr exposure(followtime_y)
margins i.totdiab, subpop(if nonmiss == 1) predict(ir) vce(unconditional) base
forvalues n = 1/2 {
	local row = `n' + 1
	local loc_x = (r(table)[1,`n']*1000)
	putexcel C`row' = `loc_x'
	local loc_ll = (r(table)[5,`n']*1000)
  putexcel D`row' = `loc_ll'
  local loc_ul = (r(table)[6,`n']*1000)
  putexcel E`row' = `loc_ul'
}


****************************************************************************************************
****************************************************************************************************
* Figure 3, Panel A data - overall and men vs. women
****************************************************************************************************
****************************************************************************************************
* Create excel doc
putexcel set "${output}\ELSA_diabetes_mortality_tables_${date}.xlsx", sheet("Figure 3A") modify

* Make table frame
* Variables
putexcel A1 = "label"
putexcel B1 = "mrr_coef"
putexcel C1 = "mrr_lower"
putexcel D1 = "mrr_upper"
putexcel E1 = "mrr"
putexcel F1 = "mrd_coef"
putexcel G1 = "mrd_lower"
putexcel H1 = "mrd_upper"
putexcel I1 = "mrd"

* label
putexcel A2 = "England (ELSA)"

putexcel A3 = "Women"
putexcel A4 = "Men"
putexcel A5 = "Overall"

* coef, lower, upper
* ELSA
svyset e_cluster [pw = e_weight], strata(e_strata)

* Women
* MRR
svy, subpop(if nonmiss == 1 & gender == 2): poisson censordied i.totdiab i.agecat i.educ i.econtert i.smoker b2.bmicat, base irr exposure(followtime_y)
local loc_x = r(table)[1,2]
putexcel B3 = `loc_x'
local loc_ll = r(table)[5,2]
putexcel C3 = `loc_ll'
local loc_ul = r(table)[6,2]
putexcel D3 = `loc_ul'
local loc_est = string(`loc_x',"%9.2f") + " (" + string(`loc_ll',"%9.2f") + " - " + string(`loc_ul',"%9.2f") + ")"
putexcel E3 = "`loc_est'"

* MRD
margins, subpop(if nonmiss == 1 & gender == 2) dydx(i.totdiab) predict(ir) vce(unconditional) base
local loc_x = (r(table)[1,2]*1000)
putexcel F3 = `loc_x'
local loc_ll = (r(table)[5,2]*1000)
putexcel G3 = `loc_ll'
local loc_ul = (r(table)[6,2]*1000)
putexcel H3 = `loc_ul'
local loc_est = string(`loc_x',"%9.1f") + " (" + string(`loc_ll',"%9.1f") + " - " + string(`loc_ul',"%9.1f") + ")"
putexcel I3 = "`loc_est'"

* Men
* MRR
svy, subpop(if nonmiss == 1 & gender == 1): poisson censordied i.totdiab i.agecat i.educ i.econtert i.smoker b2.bmicat, base irr exposure(followtime_y)
local loc_x = r(table)[1,2]
putexcel B4 = `loc_x'
local loc_ll = r(table)[5,2]
putexcel C4 = `loc_ll'
local loc_ul = r(table)[6,2]
putexcel D4 = `loc_ul'
local loc_est = string(`loc_x',"%9.2f") + " (" + string(`loc_ll',"%9.2f") + " - " + string(`loc_ul',"%9.2f") + ")"
putexcel E4 = "`loc_est'"

* MRD
margins, subpop(if nonmiss == 1 & gender == 1) dydx(i.totdiab) predict(ir) vce(unconditional) base
local loc_x = (r(table)[1,2]*1000)
putexcel F4 = `loc_x'
local loc_ll = (r(table)[5,2]*1000)
putexcel G4 = `loc_ll'
local loc_ul = (r(table)[6,2]*1000)
putexcel H4 = `loc_ul'
local loc_est = string(`loc_x',"%9.1f") + " (" + string(`loc_ll',"%9.1f") + " - " + string(`loc_ul',"%9.1f") + ")"
putexcel I4 = "`loc_est'"

* Overall
* MRR
svy, subpop(if nonmiss == 1): poisson censordied i.totdiab i.agecat i.gender i.educ i.econtert i.smoker b2.bmicat, base irr exposure(followtime_y)
local loc_x = r(table)[1,2]
putexcel B5 = `loc_x'
local loc_ll = r(table)[5,2]
putexcel C5 = `loc_ll'
local loc_ul = r(table)[6,2]
putexcel D5 = `loc_ul'
local loc_est = string(`loc_x',"%9.2f") + " (" + string(`loc_ll',"%9.2f") + " - " + string(`loc_ul',"%9.2f") + ")"
putexcel E5 = "`loc_est'"

* MRD
margins, subpop(if nonmiss == 1) dydx(i.totdiab) predict(ir) vce(unconditional) base
local loc_x = (r(table)[1,2]*1000)
putexcel F5 = `loc_x'
local loc_ll = (r(table)[5,2]*1000)
putexcel G5 = `loc_ll'
local loc_ul = (r(table)[6,2]*1000)
putexcel H5 = `loc_ul'
local loc_est = string(`loc_x',"%9.1f") + " (" + string(`loc_ll',"%9.1f") + " - " + string(`loc_ul',"%9.1f") + ")"
putexcel I5 = "`loc_est'"


****************************************************************************************************
****************************************************************************************************
* Figure 3, Panel B data - diagnosed vs. no diabetes and undiagnosed vs. no diabetes
****************************************************************************************************
****************************************************************************************************
* Create excel doc
putexcel set "${output}\ELSA_diabetes_mortality_tables_${date}.xlsx", sheet("Figure 3B") modify

* Make table frame
* Variables
putexcel A1 = "label"
putexcel B1 = "mrr_coef"
putexcel C1 = "mrr_lower"
putexcel D1 = "mrr_upper"
putexcel E1 = "mrr"
putexcel F1 = "mrd_coef"
putexcel G1 = "mrd_lower"
putexcel H1 = "mrd_upper"
putexcel I1 = "mrd"

* label
putexcel A2 = "England (ELSA)"
putexcel A3 = "Diagnosed vs. no diabetes"
putexcel A4 = "Undiagnosed vs. no diabetes"

* coef, lower, upper, estimate
* ELSA
svyset e_cluster [pw = e_weight], strata(e_strata)

* Diagnosed vs. no diabetes
* MRR
svy, subpop(if nonmiss == 1 & inlist(diab3,0,2)): poisson censordied i.diab3 i.agecat i.gender i.educ i.econtert i.smoker b2.bmicat, base irr exposure(followtime_y)
local loc_x = r(table)[1,2]
putexcel B3 = `loc_x'
local loc_ll = r(table)[5,2]
putexcel C3 = `loc_ll'
local loc_ul = r(table)[6,2]
putexcel D3 = `loc_ul'
local loc_est = string(`loc_x',"%9.2f") + " (" + string(`loc_ll',"%9.2f") + " - " + string(`loc_ul',"%9.2f") + ")"
putexcel E3 = "`loc_est'"

* MRD
margins, subpop(if nonmiss == 1 & inlist(diab3,0,2)) dydx(i.diab3) predict(ir) vce(unconditional) base
local loc_x = (r(table)[1,2]*1000)
putexcel F3 = `loc_x'
local loc_ll = (r(table)[5,2]*1000)
putexcel G3 = `loc_ll'
local loc_ul = (r(table)[6,2]*1000)
putexcel H3 = `loc_ul'
local loc_est = string(`loc_x',"%9.1f") + " (" + string(`loc_ll',"%9.1f") + " - " + string(`loc_ul',"%9.1f") + ")"
putexcel I3 = "`loc_est'"

* Undiagnosed vs. no diabetes
* MRR
svy, subpop(if nonmiss == 1 & inlist(diab3,0,1)): poisson censordied i.diab3 i.agecat i.gender i.educ i.econtert i.smoker b2.bmicat, base irr exposure(followtime_y)
local loc_x = r(table)[1,2]
putexcel B4 = `loc_x'
local loc_ll = r(table)[5,2]
putexcel C4 = `loc_ll'
local loc_ul = r(table)[6,2]
putexcel D4 = `loc_ul'
local loc_est = string(`loc_x',"%9.2f") + " (" + string(`loc_ll',"%9.2f") + " - " + string(`loc_ul',"%9.2f") + ")"
putexcel E4 = "`loc_est'"

* MRD
margins, subpop(if nonmiss == 1 & inlist(diab3,0,1)) dydx(i.diab3) predict(ir) vce(unconditional) base
local loc_x = (r(table)[1,2]*1000)
putexcel F4 = `loc_x'
local loc_ll = (r(table)[5,2]*1000)
putexcel G4 = `loc_ll'
local loc_ul = (r(table)[6,2]*1000)
putexcel H4 = `loc_ul'
local loc_est = string(`loc_x',"%9.1f") + " (" + string(`loc_ll',"%9.1f") + " - " + string(`loc_ul',"%9.1f") + ")"
putexcel I4 = "`loc_est'"


****************************************************************************************************
****************************************************************************************************
* Appendix 6 - Adjusted all-cause mortality rates by cohort
****************************************************************************************************
****************************************************************************************************
* Create excel doc
putexcel set "${output}\ELSA_diabetes_mortality_tables_${date}.xlsx", sheet("Appendix 6") modify

* Table formatting
* Lines
putexcel A3:F3, border(top)
putexcel A3:F3, border(bottom)
putexcel A6:F6, border(bottom)

* Font
putexcel A1:F6, font("Arial",10)

* Column width
mata: b = xl()
mata: b.load_book("${output}\HRS diabetes mortality analysis ${date}.xlsx")
mata: b.set_sheet("Appendix 6")
mata: b.set_column_width(1,1,25)
mata: b.set_column_width(2,2,20)

* Center justify
putexcel B3:B6, hcenter

* Right justify
putexcel A5:A6, right

* Make table frame
* Title
putexcel A1 = "Appendix 6"

* Top rows
putexcel B3 = "England (ELSA)"

* First column *************************************************************************************
putexcel A4 = "Mortality rate"
putexcel A5 = "Without diabetes"
putexcel A6 = "Diabetes"

** Fill in results
** Second column *********************************************************
svyset e_cluster [pw = e_weight], strata(e_strata)
svy, subpop(if nonmiss == 1): poisson censordied i.totdiab i.agecat i.gender i.educ i.econtert i.smoker b2.bmicat, base irr exposure(followtime_y)
margins i.totdiab, subpop(if nonmiss == 1) predict(ir) vce(unconditional) base
forvalues n = 1/2 {
	local row = `n' + 4
	local loc_x = (r(table)[1,`n']*1000)
	local loc_ll = (r(table)[5,`n']*1000)
	local loc_ul = (r(table)[6,`n']*1000)
	local loc_est = string(`loc_x',"%9.1f") + " (" + string(`loc_ll',"%9.1f") + " - " + string(`loc_ul',"%9.1f") + ")"
	putexcel B`row' = "`loc_est'"
}


****************************************************************************************************
****************************************************************************************************
* Appendix 7 - Mortality rate ratios by age groups
****************************************************************************************************
****************************************************************************************************
* Create excel doc
putexcel set "${output}\ELSA_diabetes_mortality_tables_${date}.xlsx", sheet("Appendix 7") modify

* Make table frame
* Variables
putexcel A1 = "label"
putexcel B1 = "mrr_coef"
putexcel C1 = "mrr_lower"
putexcel D1 = "mrr_upper"
putexcel E1 = "mrr"

* label
putexcel A2 = "England (ELSA)"
putexcel A3 = "51-59"
putexcel A4 = "60-69"
putexcel A5 = "70+"

* coef, lower, upper, estimate
* ELSA
svyset e_cluster [pw = e_weight], strata(e_strata)

* 51-59
* MRR
svy, subpop(if nonmiss == 1 & agecat == 1): poisson censordied i.totdiab i.gender i.educ i.econtert i.smoker b2.bmicat, base irr exposure(followtime_y)
local loc_x = r(table)[1,2]
putexcel B3 = `loc_x'
local loc_ll = r(table)[5,2]
putexcel C3 = `loc_ll'
local loc_ul = r(table)[6,2]
putexcel D3 = `loc_ul'
local loc_est = string(`loc_x',"%9.2f") + " (" + string(`loc_ll',"%9.2f") + " - " + string(`loc_ul',"%9.2f") + ")"
putexcel E3 = "`loc_est'"

* 60-69
* MRR
svy, subpop(if nonmiss == 1 & agecat == 2): poisson censordied i.totdiab i.gender i.educ i.econtert i.smoker b2.bmicat, base irr exposure(followtime_y)
local loc_x = r(table)[1,2]
putexcel B4 = `loc_x'
local loc_ll = r(table)[5,2]
putexcel C4 = `loc_ll'
local loc_ul = r(table)[6,2]
putexcel D4 = `loc_ul'
local loc_est = string(`loc_x',"%9.2f") + " (" + string(`loc_ll',"%9.2f") + " - " + string(`loc_ul',"%9.2f") + ")"
putexcel E4 = "`loc_est'"

* 70+
* MRR
svy, subpop(if nonmiss == 1 & agecat == 3): poisson censordied i.totdiab i.gender i.educ i.econtert i.smoker b2.bmicat, base irr exposure(followtime_y)
local loc_x = r(table)[1,2]
putexcel B4 = `loc_x'
local loc_ll = r(table)[5,2]
putexcel C4 = `loc_ll'
local loc_ul = r(table)[6,2]
putexcel D4 = `loc_ul'
local loc_est = string(`loc_x',"%9.2f") + " (" + string(`loc_ll',"%9.2f") + " - " + string(`loc_ul',"%9.2f") + ")"
putexcel E4 = "`loc_est'"


****************************************************************************************************
****************************************************************************************************
* Appendix 8 data - comparisons of diagnosed vs. undiagnosed
****************************************************************************************************
****************************************************************************************************
* Create excel doc
putexcel set "${output}\ELSA_diabetes_mortality_tables_${date}.xlsx", sheet("Appendix 8") modify

* Make table frame
* Variables
putexcel A1 = "label"
putexcel B1 = "mrr_coef"
putexcel C1 = "mrr_lower"
putexcel D1 = "mrr_upper"
putexcel E1 = "mrr"
putexcel F1 = "mrd_coef"
putexcel G1 = "mrd_lower"
putexcel H1 = "mrd_upper"
putexcel I1 = "mrd"

* label
putexcel A2 = "England (ELSA)"

* coef, lower, upper, estimate
* ELSA
svyset e_cluster [pw = e_weight], strata(e_strata)

* Overall
* MRR
svy, subpop(if nonmiss == 1): poisson censordied i.diag_among_diab i.agecat i.gender i.educ i.econtert i.smoker b2.bmicat, base irr exposure(followtime_y)
local loc_x = r(table)[1,2]
putexcel B2 = `loc_x'
local loc_ll = r(table)[5,2]
putexcel C2 = `loc_ll'
local loc_ul = r(table)[6,2]
putexcel D2 = `loc_ul'
local loc_est = string(`loc_x',"%9.2f") + " (" + string(`loc_ll',"%9.2f") + " - " + string(`loc_ul',"%9.2f") + ")"
putexcel E2 = "`loc_est'"

* MRD
margins, subpop(if nonmiss == 1) dydx(i.diag_among_diab) predict(ir) vce(unconditional) base
local loc_x = (r(table)[1,2]*1000)
putexcel F2 = `loc_x'
local loc_ll = (r(table)[5,2]*1000)
putexcel G2 = `loc_ll'
local loc_ul = (r(table)[6,2]*1000)
putexcel H2 = `loc_ul'
local loc_est = string(`loc_x',"%9.1f") + " (" + string(`loc_ll',"%9.1f") + " - " + string(`loc_ul',"%9.1f") + ")"
putexcel I2 = "`loc_est'"


****************************************************************************************************
****************************************************************************************************
* Appendix 9 data - sensitivity analysis using Cox proportional hazards models instead of Poisson
*                   with an offset for log-transformed person-years and robust standard errors
****************************************************************************************************
****************************************************************************************************
* Create excel doc
putexcel set "${output}\ELSA_diabetes_mortality_tables_${date}.xlsx", sheet("Appendix 9") modify

* Make table frame
* Variables
putexcel A1 = "label"
putexcel B1 = "mrr_coef"
putexcel C1 = "mrr_lower"
putexcel D1 = "mrr_upper"
putexcel E1 = "mrr"
putexcel F1 = "hr_coef"
putexcel G1 = "hr_lower"
putexcel H1 = "hr_upper"
putexcel I1 = "hr"

* label
putexcel A2 = "England (ELSA)"

* Generate ages at entry and exit for age-on-study as the time scale in survival analyses
gen age_entry = agey
gen age_exit  = agey + followtime_y

* coef, lower, upper, estimate
* ELSA
svyset e_cluster [pw = e_weight], strata(e_strata)

* Overall
* MRR
svy, subpop(if nonmiss == 1): poisson censordied i.totdiab i.agecat i.gender i.educ i.econtert i.smoker b2.bmicat, base irr exposure(followtime_y)
local loc_x = r(table)[1,2]
putexcel B2 = `loc_x'
local loc_ll = r(table)[5,2]
putexcel C2 = `loc_ll'
local loc_ul = r(table)[6,2]
putexcel D2 = `loc_ul'
local loc_est = string(`loc_x',"%9.2f") + " (" + string(`loc_ll',"%9.2f") + " - " + string(`loc_ul',"%9.2f") + ")"
putexcel E2 = "`loc_est'"

* Cox HR
preserve
stset age_exit [pw = e_weight], failure(censordied) enter(age_entry)
svy, subpop(if nonmiss == 1): stcox i.totdiab i.agecat i.gender i.educ i.econtert i.smoker b2.bmicat, base
local loc_x = r(table)[1,2]
putexcel F2 = `loc_x'
local loc_ll = r(table)[5,2]
putexcel G2 = `loc_ll'
local loc_ul = r(table)[6,2]
putexcel H2 = `loc_ul'
local loc_est = string(`loc_x',"%9.2f") + " (" + string(`loc_ll',"%9.2f") + " - " + string(`loc_ul',"%9.2f") + ")"
putexcel I2 = "`loc_est'"
restore

* Gompertz HR
preserve

	stset age_exit [pw = e_weight], failure(censordied) enter(age_entry)
	svy, subpop(if nonmiss == 1):  ///
		streg i.totdiab i.agecat i.gender i.educ i.econtert i.smoker b2.bmicat, dist(gompertz) base

restore

* Exponential HR
preserve

	stset age_exit [pw = e_weight], failure(censordied) enter(age_entry)
	svy, subpop(if nonmiss == 1):  ///
		streg i.totdiab i.agecat i.gender i.educ i.econtert i.smoker b2.bmicat, dist(exp) base
	
restore


****************************************************************************************************
****************************************************************************************************
* Appendix 10 data - sensitivity analysis using self-report of diabetes medication instead of
*                    self-report of diabetes diagnosis
****************************************************************************************************
****************************************************************************************************
* Create excel doc
putexcel set "${output}\ELSA_diabetes_mortality_tables_${date}.xlsx", sheet("Appendix 10") modify

* Make table frame
* Variables
putexcel A1 = "label"
putexcel B1 = "mrr_coef"
putexcel C1 = "mrr_lower"
putexcel D1 = "mrr_upper"
putexcel E1 = "mrr"
putexcel F1 = "mrd_coef"
putexcel G1 = "mrd_lower"
putexcel H1 = "mrd_upper"
putexcel I1 = "mrd"

* label
putexcel A2 = "England (ELSA)"

* coef, lower, upper, estimate
* ELSA
svyset e_cluster [pw = e_weight], strata(e_strata)

* Overall
* MRR
svy, subpop(if nonmiss == 1): poisson censordied i.totdiab_rx i.agecat i.gender i.educ i.econtert i.smoker b2.bmicat, base irr exposure(followtime_y)
local loc_x = r(table)[1,2]
putexcel B2 = `loc_x'
local loc_ll = r(table)[5,2]
putexcel C2 = `loc_ll'
local loc_ul = r(table)[6,2]
putexcel D2 = `loc_ul'
local loc_est = string(`loc_x',"%9.2f") + " (" + string(`loc_ll',"%9.2f") + " - " + string(`loc_ul',"%9.2f") + ")"
putexcel E2 = "`loc_est'"

* MRD
margins, subpop(if nonmiss == 1) dydx(i.totdiab_rx) predict(ir) vce(unconditional) base
local loc_x = (r(table)[1,2]*1000)
putexcel F2 = `loc_x'
local loc_ll = (r(table)[5,2]*1000)
putexcel G2 = `loc_ll'
local loc_ul = (r(table)[6,2]*1000)
putexcel H2 = `loc_ul'
local loc_est = string(`loc_x',"%9.1f") + " (" + string(`loc_ll',"%9.1f") + " - " + string(`loc_ul',"%9.1f") + ")"
putexcel I2 = "`loc_est'"


****************************************************************************************************
****************************************************************************************************
* Appendix 11 data - sensitivity analysis not adjusting for BMI
****************************************************************************************************
****************************************************************************************************
* Create excel doc
putexcel set "${output}\ELSA_diabetes_mortality_tables_${date}.xlsx", sheet("Appendix 11") modify

* Make table frame
* Variables
putexcel A1 = "label"
putexcel B1 = "mrr_coef"
putexcel C1 = "mrr_lower"
putexcel D1 = "mrr_upper"
putexcel E1 = "mrr"
putexcel F1 = "mrd_coef"
putexcel G1 = "mrd_lower"
putexcel H1 = "mrd_upper"
putexcel I1 = "mrd"

* label
putexcel A2 = "England (ELSA)"

* coef, lower, upper, estimate
* ELSA
svyset e_cluster [pw = e_weight], strata(e_strata)

* Overall
* MRR
svy, subpop(if nonmiss == 1): poisson censordied i.totdiab i.agecat i.gender i.educ i.econtert i.smoker, base irr exposure(followtime_y)
local loc_x = r(table)[1,2]
putexcel B2 = `loc_x'
local loc_ll = r(table)[5,2]
putexcel C2 = `loc_ll'
local loc_ul = r(table)[6,2]
putexcel D2 = `loc_ul'
local loc_est = string(`loc_x',"%9.2f") + " (" + string(`loc_ll',"%9.2f") + " - " + string(`loc_ul',"%9.2f") + ")"
putexcel E2 = "`loc_est'"

* MRD
margins, subpop(if nonmiss == 1) dydx(i.totdiab) predict(ir) vce(unconditional) base
local loc_x = (r(table)[1,2]*1000)
putexcel F2 = `loc_x'
local loc_ll = (r(table)[5,2]*1000)
putexcel G2 = `loc_ll'
local loc_ul = (r(table)[6,2]*1000)
putexcel H2 = `loc_ul'
local loc_est = string(`loc_x',"%9.1f") + " (" + string(`loc_ll',"%9.1f") + " - " + string(`loc_ul',"%9.1f") + ")"
putexcel I2 = "`loc_est'"


****************************************************************************************************
* Non table statistics
****************************************************************************************************
* Results paragraph 1
count if nonmiss == 1
count if nonmiss == 1 & censordied == 1
svyset e_cluster [pw = e_weight], strata(e_strata)
svy, subpop(if nonmiss == 1): proportion educ2, percent

* Total follow-up time (years)
summ followtime_y if nonmiss == 1
disp r(sum)


****************************************************************************************************
* Close Log
****************************************************************************************************
log close

